load data¶
root: location of the csv files.fromDtandtoDtare start and end dates for range
In [1]:
import sys
sys.path.append('../')
import dataframeLoader as dfl
import pandas as pd
from importlib import reload
reload(dfl)
# Provide csv data location and appliance and timerange information.
root = '../../.dataDir'
fromDt = '2024-09-26'
toDt = '2024-10-05'
# Provide list of prometheus metrics to load.
# metricsArr = ['cpu_used', 'download_workers_count', 'memory_used', 'task_queue_length', 'infra_access_latency', 'pod_cpu_usage', 'pod_memory_usage']
metricsArr = ['cpu_used'
,'task_queue_length'
, 'memory_used'
]
daterange=[fromDt, toDt]
df = dfl.loadApplianceTimeSeriesData(root, metricsArr, daterange)
loading Unstrctured Data from file: SCANPROC-*.csv
loading Strctured Data from file: STRUCTURED-*.csv
processing securiti_appliance_cpu_used-max*.csv
processing securiti_appliance_cpu_used-avg*.csv
processing securiti_appliance_task_queue_length-max*.csv
processing securiti_appliance_task_queue_length-avg*.csv
processing securiti_appliance_memory_used-max*.csv
processing securiti_appliance_memory_used-avg*.csv
loading Unstrctured Data from file: UNSTRUCTURED-*.csv
Generate plotly report¶
appliance_id: unique identifier of the appliance.
In [2]:
reload(dfl)
appliance_id='58e98e10-1b19-4c84-93c0-db2ad5903b80'
dfp = df[(df['appliance_id'] == appliance_id)]
# Get Full list of metrics in dataframe
# metrics_category_order = list(dfp.metrics.unique())
# Provide metrics to show from the data frame. Order is preserved.
metrics_category_order = {# "Indicator": "Chart Description"
"uniqPodCount": "Scheduled Download workers by datasource"
,"cpu_used_avg": "Average CPU by Appliance Node/VM"
, "memory_used_avg": "Average Memory by Appliance Node/VM"
, "fileDownloadTimeInHrs": "Time spent by connectors in downloading files for scanning"
, "IdleTimeInHrs": "Cumulative time spent waiting by (all) download workers by datasource"
, "scanTimeInHrs": "Cumulative time spent scanning by (all) download workers by datasource"
, "dataScannedinGB" : "Data scanned in Gigabits per hour"
,"numberOfColsScanned": "Number of structured data columns scanned per hour"
, "numberOfChunksScanned": "Number of structured data row chunks (of 64 rows) scanned per hour"
, "numFilesScanned": "Number of files/tables scanned per hour"
, "avgFileSizeInMB": "Average size of file or table scanned"
, "task_queue_length_avg": "Average temporary task queue length (indicator of file tasks in queue for download or scanning)"
}
title = 'Appliance plot for appliance_id '+appliance_id+' between '+fromDt+' and '+toDt
fig = dfl.plotMetricsFacetForApplianceId(dfp, title, metrics_category_order, 'node_ip', 'GraphColor')
fig.show()